{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Testing Deployed LLMs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!pip install openai==1.6.1 --upgrade --user"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### VLLM Completion API"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from openai import OpenAI\n",
    "from rich import print\n",
    "\n",
    "ip = \"4.227.148.13\"\n",
    "model = \"CognitiveLab/Project-K-MGPU-500K-FFT-train\"\n",
    "\n",
    "# Modify OpenAI's API key and API base to use vLLM's API server.\n",
    "openai_api_key = \"EMPTY\"\n",
    "openai_api_base = f\"http://{ip}:8000/v1\"\n",
    "\n",
    "system_prompt = \"What are an expert chef's\"\n",
    "user_prompt = \"What is the recipe of mayonnaise explain in detail?\"\n",
    "\n",
    "client = OpenAI(\n",
    "    api_key=openai_api_key,\n",
    "    base_url=openai_api_base,\n",
    ")\n",
    "# completion = client.completions.create(model=model,\n",
    "#                                       prompt=\"What is the recipe of mayonnaise?\")\n",
    "completion = client.completions.create(model=model,\n",
    "                                      prompt=f\"what is your name?\")\n",
    "print(\"Completion result:\", completion)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### VLLM chat API"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from openai import OpenAI\n",
    "\n",
    "# Modify OpenAI's API key and API base to use vLLM's API server.\n",
    "openai_api_key = \"EMPTY\"\n",
    "openai_api_base = f\"http://{ip}:8000/v1\"\n",
    "\n",
    "client = OpenAI(\n",
    "    # defaults to os.environ.get(\"OPENAI_API_KEY\")\n",
    "    api_key=openai_api_key,\n",
    "    base_url=openai_api_base,\n",
    ")\n",
    "\n",
    "# models = client.models.list()\n",
    "# model = models.data[0].id\n",
    "\n",
    "chat_completion = client.chat.completions.create(\n",
    "    messages=[\n",
    "        {\n",
    "            \"role\": \"user\",\n",
    "            \"content\": \"nina hesau srinihi\"\n",
    "        }, \n",
    "        {\n",
    "            \"role\":\"assistant\",\n",
    "            \"content\":\"srinihi is my name\"\n",
    "        }, \n",
    "        {\n",
    "            \"role\": \"user\",\n",
    "            \"content\": \"what is your name?\"\n",
    "        },\n",
    "    ],\n",
    "    model=model,\n",
    ")\n",
    "\n",
    "\n",
    "print(\"Chat completion results:\")\n",
    "print(chat_completion)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Text Generation Inferences Request"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import requests\n",
    "\n",
    "headers = {\n",
    "    \"Content-Type\": \"application/json\",\n",
    "}\n",
    "\n",
    "data = {\n",
    "    'inputs': '[INST] what is the meaning of life[/INST]',\n",
    "    'parameters': {\n",
    "        'max_new_tokens': 2000,\n",
    "    },\n",
    "}\n",
    "\n",
    "response = requests.post('http://52.168.86.227:8000/generate', headers=headers, json=data)\n",
    "print(response.json())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Text Generation Inference Streaming\n",
    "\n",
    "TGI also has streaming support - [Documentation](https://huggingface.co/docs/text-generation-inference/conceptual/streaming)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from huggingface_hub import AsyncInferenceClient\n",
    "\n",
    "client = AsyncInferenceClient(\"http://127.0.0.1:8080\")\n",
    "async for token in await client.text_generation(\"How do you make cheese?\", stream=True):\n",
    "    print(token)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
